# Import the Data File
library(readr)

Air_Quality_Health_Data <- read_csv("air_quality_health_monthly.csv")
## Rows: 498 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): city, year_month, population_density
## dbl (9): aqi, pm2_5, pm10, no2, o3, temperature, humidity, hospital_admissio...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# View(Air_Quality_Health_Data)

MyData <- Air_Quality_Health_Data   # Renames the data file

library(fpp2)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo 
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2   3.5.1      ✔ fma       2.5   
## ✔ forecast  8.24.0     ✔ expsmooth 2.3
## Warning: package 'forecast' was built under R version 4.4.1
## 
library(TTR)

attributes(MyData)
## $row.names
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## [253] 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## [271] 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## [289] 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
## [307] 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
## [325] 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
## [343] 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
## [361] 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
## [379] 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
## [397] 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
## [415] 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
## [433] 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
## [451] 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
## [469] 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
## [487] 487 488 489 490 491 492 493 494 495 496 497 498
## 
## $names
##  [1] "city"                "year_month"          "aqi"                
##  [4] "pm2_5"               "pm10"                "no2"                
##  [7] "o3"                  "temperature"         "humidity"           
## [10] "hospital_admissions" "hospital_capacity"   "population_density" 
## 
## $spec
## cols(
##   city = col_character(),
##   year_month = col_character(),
##   aqi = col_double(),
##   pm2_5 = col_double(),
##   pm10 = col_double(),
##   no2 = col_double(),
##   o3 = col_double(),
##   temperature = col_double(),
##   humidity = col_double(),
##   hospital_admissions = col_double(),
##   hospital_capacity = col_double(),
##   population_density = col_character()
## )
## 
## $problems
## <pointer: 0x12c7d4950>
## 
## $class
## [1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame"
plot(MyData)

# Extracts the numerical columns
df_numeric <- MyData[, sapply(MyData, is.numeric)]

# Loop through each numeric column and plot ACF
for (colname in names(df_numeric)){
  series <- ts(df_numeric[[colname]])
  
# Open a new plotting window for each
  acf(series, main = paste("ACF for", colname))
}

# Mean Forecast 
for (colname in names(df_numeric)) {
  series <- ts(df_numeric[[colname]])  # convert to time series object
  
  model_meanf <- meanf(series,12)
  acc_meanf <- accuracy(model_meanf)
  
  plot(model_meanf, main = paste("Mean Forecast for", colname))
}

# Naive Forecast
for (colname in names(df_numeric)) {
  series <- ts(df_numeric[[colname]])  # convert to time series object
  
  model_naivef <- naive(series,12)
  acc_naivef <- accuracy(model_naivef)
  
  plot(model_naivef, main = paste("Naive Forecast for", colname))
}

# Random Walk 
for (colname in names(df_numeric)) {
  series <- ts(df_numeric[[colname]])  # convert to time series object
  
  model_rwf <- rwf(series,12, drift = TRUE)
  acc_rwf <- accuracy(model_rwf)
  
  plot(model_rwf, main = paste("Random Walk Forecast for", colname))
}

# Seasonal Naive
for (colname in names(df_numeric)) {
  series <- ts(df_numeric[[colname]])  # convert to time series object
  
  model_snaivef <- snaive(series,12, drift = TRUE)
  acc_snaivef <- accuracy(model_snaivef)
  
  plot(model_snaivef, main = paste("Seasonal Naive Forecast for", colname))
}

# Moving Averages
for (colname in names(df_numeric)) {
  series <- ts(df_numeric[[colname]])  # convert to time series object
  
  model_MA5 <- ma(series, order = 5)
  model_MA5f <- forecast(model_MA5,12)
  
  acc_MA5f <- accuracy(model_MA5f)
  
  plot(model_MA5f, main = paste("Moving Average (with the (Order 5) Forecast for", colname))
}
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

for (colname in names(df_numeric)) {
  series <- ts(df_numeric[[colname]])  # convert to time series object
  
  model_MA9 <- ma(series, order = 9)
  model_MA9f <- forecast(model_MA9,12)
  
  acc_MA9f <- accuracy(model_MA9f)
  
  plot(model_MA9f, main = paste("Moving Average (with the (Order 9) Forecast for", colname))
}
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

# Holt Winters
for (colname in names(df_numeric)) {
  series <- ts(df_numeric[[colname]], frequency = 12)  # convert to time series object
  
  model_hw <- HoltWinters(series) # Fit Holt-Winters model
  model_hwf <- forecast(model_hw,12)
  
  acc_hwf <- accuracy(model_hwf)
  
  plot(model_hwf, main = paste("Holt-Winters Forecast for", colname))
}

## Warning in HoltWinters(series): optimization difficulties: ERROR:
## ABNORMAL_TERMINATION_IN_LNSRCH

# Simple Smoothing
for (colname in names(df_numeric)) {
  series <- ts(df_numeric[[colname]], frequency = 12)  # convert to time series object
  
  model_ses_hw <- HoltWinters(series, beta = FALSE, gamma = FALSE)
  model_ses_hwf <- forecast(model_ses_hw,12)
  
  acc_ses_hwf <- accuracy(model_ses_hwf)
  
  plot(model_ses_hwf, main = paste("Simple Smoothing (Holt-Winters) Forecast for", colname))
}

# Decomposition
for (colname in names(df_numeric)) {
  series <- ts(df_numeric[[colname]], frequency = 12)  # convert to time series object
  
  model_ets <- ets(series)   # Model for the ETS model
  model_ets_forecast <- forecast(model_ets,h = 12)
  
  acc_ets_forecast <- accuracy(model_ets_forecast)
  
  plot(model_ets_forecast, main = paste("ETS Model Forecast for", colname))
  
  # Access specific attributes if you wish
  print(paste("MSE for", colname, ":", model_ets$mse))
  print(attributes(model_ets))  # shows model components
}

## [1] "MSE for aqi : 8528.45773264847"
## $names
##  [1] "loglik"     "aic"        "bic"        "aicc"       "mse"       
##  [6] "amse"       "fit"        "residuals"  "fitted"     "states"    
## [11] "par"        "m"          "method"     "series"     "components"
## [16] "call"       "initstate"  "sigma2"     "x"         
## 
## $class
## [1] "ets"

## [1] "MSE for pm2_5 : 93.5670040960724"
## $names
##  [1] "loglik"     "aic"        "bic"        "aicc"       "mse"       
##  [6] "amse"       "fit"        "residuals"  "fitted"     "states"    
## [11] "par"        "m"          "method"     "series"     "components"
## [16] "call"       "initstate"  "sigma2"     "x"         
## 
## $class
## [1] "ets"

## [1] "MSE for pm10 : 172.484814732366"
## $names
##  [1] "loglik"     "aic"        "bic"        "aicc"       "mse"       
##  [6] "amse"       "fit"        "residuals"  "fitted"     "states"    
## [11] "par"        "m"          "method"     "series"     "components"
## [16] "call"       "initstate"  "sigma2"     "x"         
## 
## $class
## [1] "ets"

## [1] "MSE for no2 : 40.7302124576175"
## $names
##  [1] "loglik"     "aic"        "bic"        "aicc"       "mse"       
##  [6] "amse"       "fit"        "residuals"  "fitted"     "states"    
## [11] "par"        "m"          "method"     "series"     "components"
## [16] "call"       "initstate"  "sigma2"     "x"         
## 
## $class
## [1] "ets"

## [1] "MSE for o3 : 62.8597900178732"
## $names
##  [1] "loglik"     "aic"        "bic"        "aicc"       "mse"       
##  [6] "amse"       "fit"        "residuals"  "fitted"     "states"    
## [11] "par"        "m"          "method"     "series"     "components"
## [16] "call"       "initstate"  "sigma2"     "x"         
## 
## $class
## [1] "ets"

## [1] "MSE for temperature : 69.3211126058046"
## $names
##  [1] "loglik"     "aic"        "bic"        "aicc"       "mse"       
##  [6] "amse"       "fit"        "residuals"  "fitted"     "states"    
## [11] "par"        "m"          "method"     "series"     "components"
## [16] "call"       "initstate"  "sigma2"     "x"         
## 
## $class
## [1] "ets"

## [1] "MSE for humidity : 173.63845479386"
## $names
##  [1] "loglik"     "aic"        "bic"        "aicc"       "mse"       
##  [6] "amse"       "fit"        "residuals"  "fitted"     "states"    
## [11] "par"        "m"          "method"     "series"     "components"
## [16] "call"       "initstate"  "sigma2"     "x"         
## 
## $class
## [1] "ets"

## [1] "MSE for hospital_admissions : 376.279802606846"
## $names
##  [1] "loglik"     "aic"        "bic"        "aicc"       "mse"       
##  [6] "amse"       "fit"        "residuals"  "fitted"     "states"    
## [11] "par"        "m"          "method"     "series"     "components"
## [16] "call"       "initstate"  "sigma2"     "x"         
## 
## $class
## [1] "ets"

## [1] "MSE for hospital_capacity : 221794.400500221"
## $names
##  [1] "loglik"     "aic"        "bic"        "aicc"       "mse"       
##  [6] "amse"       "fit"        "residuals"  "fitted"     "states"    
## [11] "par"        "m"          "method"     "series"     "components"
## [16] "call"       "initstate"  "sigma2"     "x"         
## 
## $class
## [1] "ets"
# Plot time series and different models in one chart
for (colname in names(df_numeric)) {
  
  series <- ts(df_numeric[[colname]], frequency = 12)
  
  # --- Fit all models ---
  model_meanf <- meanf(series, 12)
  model_naivef <- naive(series, 12)
  model_rwf <- rwf(series, 12, drift = TRUE)
  model_snaivef <- snaive(series, 12)
  model_hw <- HoltWinters(series)
  model_hwf <- forecast(model_hw, 12)
  model_ses_hw <- HoltWinters(series, beta = FALSE, gamma = FALSE)
  model_ses_hwf <- forecast(model_ses_hw, 12)
  model_ets <- ets(series)
  model_ets_forecast <- forecast(model_ets, 12)
  
  # --- Plot original series first ---
  plot(series, main = paste("Forecast Comparison for", colname),
       xlim = c(time(series)[1], time(series)[length(series)] + 12/12),
       ylim = range(c(series, model_meanf$mean, model_naivef$mean, 
                      model_rwf$mean, model_snaivef$mean,
                      model_hwf$mean, model_ses_hwf$mean, model_ets_forecast$mean)),
       col = "black", lwd = 2, ylab = "Value", xlab = "Time")
  
  # --- Add forecasts from all models ---
  lines(model_meanf$mean, col = "blue", lwd = 2, lty = 1)
  lines(model_naivef$mean, col = "red", lwd = 2, lty = 2)
  lines(model_rwf$mean, col = "green", lwd = 2, lty = 3)
  lines(model_snaivef$mean, col = "purple", lwd = 2, lty = 4)
  lines(model_hwf$mean, col = "orange", lwd = 2, lty = 5)
  lines(model_ses_hwf$mean, col = "brown", lwd = 2, lty = 6)
  lines(model_ets_forecast$mean, col = "pink", lwd = 2, lty = 7)
  
  # --- Add legend ---
  legend("topleft",
         legend = c("Original Series", "Mean", "Naive", "Random Walk", "Seasonal Naive",
                    "Holt-Winters", "Simple Smoothing", "ETS"),
         col = c("black", "blue", "red", "green", "purple", "orange", "brown", "pink"),
         lty = c(1,1,2,3,4,5,6,7),
         lwd = 2, cex = 0.8)
}

## Warning in HoltWinters(series): optimization difficulties: ERROR:
## ABNORMAL_TERMINATION_IN_LNSRCH

# Accuracy measure used for comparison is RMSE and MAPE
# Out of all the models, the Root Mean Squared Error (RMSE) and Mean Absolute Percentage Error (MAPE) for moving averages model of the order 9 is 73.06942 and 3.920257, respectively. It is the lowest across all the models therefore the moving average with the order 9 provides better accuracy in your forecast for the next 12 months.